Metadata basics
First do the plots with kruskall wallis comparison between groups to
see if there’s an overall difference, then do the pairwise tests because
KW says there is a difference.
metadata <- read_metadata(metadata_handle)
metadata <- metadata %>% mutate(Group = if_else(Group == 'Control_HealthySerosurvey', 'Healthy Control', Group))
metadata <- metadata %>% mutate(Group = if_else(Group == 'Acute_Typhi', 'Acute typhoid', Group))
number_per_country <- metadata %>% group_by(Country) %>% summarise(count = n())
number_per_country %>% kbl() %>% kable_styling()
|
Country
|
count
|
|
Bangladesh
|
120
|
|
Malawi
|
114
|
|
Nepal
|
76
|
print(sum(number_per_country$count))
## [1] 310
metadata %>% group_by(Group) %>% summarise(count = n()) %>% kbl() %>% kable_styling()
|
Group
|
count
|
|
Acute typhoid
|
103
|
|
Carrier
|
110
|
|
Healthy Control
|
97
|
baseline_chars <- get_baseline_characteristics(metadata)
# baseline_chars$pct_anti
# baseline_chars$pct_anti %>% na_if(0)
# BEWARE - na_if(0.0) will convert ALL 0s to NAs, this is ok as they're only in the antibiotics of carriers and controls
# at the moment, but if others get added in, will screw with it.
# baseline_chars %>% rename(`Median age` = median_age, `Women (%)` = pct_fem, `Antibiotics in last 2 weeks (%)` = pct_anti) %>% pivot_longer(!c(Country, Group)) %>% rename(variable_name = name) %>% pivot_wider(names_from = Group, values_from = value) %>% filter(variable_name != 'number') %>% na_if(0.0) %>% kbl(digits = c(NA, NA, 1, 1, 1)) %>% kable_styling()
# na_if stopped working, so had to do this instead
baseline_chars_table <- baseline_chars %>% rename(`Median age` = median_age, `Women (%)` = pct_fem, `Antibiotics in last 2 weeks (%)` = pct_anti) %>% pivot_longer(!c(Country, Group)) %>% rename(variable_name = name) %>% pivot_wider(names_from = Group, values_from = value) %>% filter(variable_name != 'number')
baseline_chars_table$Carrier <- replace(baseline_chars_table$Carrier,which(baseline_chars_table$Carrier==0),NA)
baseline_chars_table$`Healthy Control` <- replace(baseline_chars_table$`Healthy Control`,which(baseline_chars_table$`Healthy Control`==0),NA)
baseline_chars_table %>% kbl(digits = c(NA, NA, 1, 1, 1)) %>% kable_styling()
|
Country
|
variable_name
|
Acute typhoid
|
Carrier
|
Healthy Control
|
|
Bangladesh
|
Median age
|
6.0
|
37.0
|
28.5
|
|
Bangladesh
|
Women (%)
|
47.5
|
47.5
|
65.0
|
|
Bangladesh
|
Antibiotics in last 2 weeks (%)
|
37.5
|
NA
|
NA
|
|
Malawi
|
Median age
|
9.7
|
32.0
|
24.0
|
|
Malawi
|
Women (%)
|
64.7
|
57.5
|
65.0
|
|
Malawi
|
Antibiotics in last 2 weeks (%)
|
26.5
|
NA
|
NA
|
|
Nepal
|
Median age
|
17.2
|
43.9
|
35.0
|
|
Nepal
|
Women (%)
|
24.1
|
66.7
|
82.4
|
|
Nepal
|
Antibiotics in last 2 weeks (%)
|
44.8
|
NA
|
NA
|
# the kruskal wallis test is positive, showing a difference between the groups, so we move onto pairwise tests.
# ggplot(metadata, aes(x = Country, y = Age, fill = Group)) + geom_boxplot() + stat_compare_means(method = 'kruskal.test', label = "p")
comparisons_groups <- list(c("Acute typhoid", "Carrier"), c("Acute typhoid", "Healthy Control"), c("Carrier", "Healthy Control"))
comparisons_countries <- list(c('Bangladesh', 'Malawi'), c('Bangladesh', 'Nepal'), c('Malawi', 'Nepal'))
# default stat method when doing pairwise tests in wilcoxon.
ggboxplot(metadata, facet.by = "Country", y = "Age", x = "Group", color = "Group") + stat_compare_means(comparisons = comparisons_groups) + rremove("x.text") + rremove("xlab") + rremove("x.ticks") # +rotate_x_text(angle = 45)

ggboxplot(metadata, facet.by = "Group", y = "Age", x = "Country", color = "Country") + stat_compare_means(comparisons = comparisons_countries) + rotate_x_text(angle = 45)

country_group_sex <- metadata %>% group_by(Country, Group, Sex) %>% summarise(count = n())
plot_sex <- function(eg1, c){
d <- eg1 %>% filter(Country == c)
p <- ggplot(d, aes(x = Group, y = count, fill = Sex)) +
geom_bar(stat ='identity', position = 'fill') +
ylab('Proportion') +
ggtitle(c)# +
#theme(axis.text=element_text(size=34), axis.title=element_text(size=36,face="bold"), plot.title = element_text(size = 40, face = "bold"), legend.key.size = unit(4, 'cm'), legend.title = element_text(size = 34), legend.text = element_text(size = 28))
return(p)
}
m_sex <- plot_sex(country_group_sex, 'Malawi')
b_sex <- plot_sex(country_group_sex, 'Bangladesh')
n_sex <- plot_sex(country_group_sex, 'Nepal')
m_sex / b_sex / n_sex

all countries, all
groups
Alpha diversity -
metaphlan
Alpha diversity - all countries, all groups
all_countries_all_groups_alpha <- metaphlan_alpha(strataa_metaphlan_data, strataa_metaphlan_metadata, countries_of_interest = c('Bangladesh', 'Malawi', 'Nepal'), groups_of_interest = c('Acute_Typhi', 'Carrier', 'Control_HealthySerosurvey'), comparisons = list(c('Acute_Typhi', 'Carrier'), c('Acute_Typhi', 'Control_HealthySerosurvey'), c('Control_HealthySerosurvey', 'Carrier')))

all_countries_all_groups_alpha$alpha_anova_summary_with_var_names %>% dplyr::mutate_if(is.numeric, funs(as.character(signif(., 3)))) %>% kbl() %>% kable_styling()
|
rownames(alpha_anova_summary[[1]])
|
Df
|
Sum.Sq
|
Mean.Sq
|
F.value
|
Pr..F.
|
is_it_significant
|
|
Country:Group
|
4
|
7.82
|
1.96
|
11.7
|
9.31e-09
|
significant
|
|
Group
|
2
|
5.92
|
2.96
|
17.6
|
6.23e-08
|
significant
|
|
Country
|
2
|
2.75
|
1.38
|
8.2
|
0.000348
|
significant
|
|
Country:Sex
|
2
|
1.15
|
0.575
|
3.42
|
0.034
|
not_significant
|
|
Sex:Group
|
2
|
1.08
|
0.539
|
3.21
|
0.0417
|
not_significant
|
|
Country:Sex:Age
|
2
|
1.07
|
0.534
|
3.18
|
0.0431
|
not_significant
|
|
Country:Age
|
2
|
0.61
|
0.305
|
1.82
|
0.164
|
not_significant
|
|
Country:Sex:Group
|
4
|
1.08
|
0.27
|
1.61
|
0.172
|
not_significant
|
|
Sex:Age
|
1
|
0.308
|
0.308
|
1.84
|
0.177
|
not_significant
|
|
Country:Group:Age
|
4
|
0.834
|
0.209
|
1.24
|
0.293
|
not_significant
|
|
Group:Age
|
2
|
0.228
|
0.114
|
0.68
|
0.507
|
not_significant
|
|
Sex
|
1
|
0.0294
|
0.0294
|
0.175
|
0.676
|
not_significant
|
|
Sex:Group:Age
|
2
|
0.101
|
0.0506
|
0.302
|
0.74
|
not_significant
|
|
Country:Sex:Group:Age
|
4
|
0.0673
|
0.0168
|
0.1
|
0.982
|
not_significant
|
|
Age
|
1
|
1.07e-08
|
1.07e-08
|
6.36e-08
|
1
|
not_significant
|
|
Residuals
|
274
|
46
|
0.168
|
NA
|
NA
|
NA
|
all_countries_all_groups_alpha$alpha_plot_group

all_countries_all_groups_alpha$alpha_plot_antibiotics

all_countries_all_groups_alpha$alpha_by_country
### Beta diversity - metaphlan
All groups.
all_countries_beta_all_groups <- metaphlan_beta(strataa_metaphlan_data, strataa_metaphlan_metadata, c('Bangladesh', 'Malawi', 'Nepal'), c('Acute_Typhi', 'Carrier', 'Control_HealthySerosurvey'))
all_countries_beta_all_groups$pn_res %>% kbl %>% kable_styling()
|
|
R2
|
Pr(>F)
|
is_it_significant
|
|
Group
|
0.0759082
|
0.0009990
|
significant
|
|
Group:Age
|
0.0123544
|
0.0049950
|
significant
|
|
Sex:Group:Age
|
0.0081926
|
0.1058941
|
not_significant
|
|
Sex:Group
|
0.0079745
|
0.1118881
|
not_significant
|
|
Age
|
0.0038070
|
0.1698302
|
not_significant
|
|
Sex
|
0.0035891
|
0.2117882
|
not_significant
|
|
Sex:Age
|
0.0037273
|
0.2247752
|
not_significant
|
|
Total
|
1.0000000
|
NA
|
NA
|
|
Residual
|
0.8844470
|
NA
|
NA
|
bgd_beta_all_groups <- metaphlan_beta(strataa_metaphlan_data, strataa_metaphlan_metadata, c('Bangladesh'), c('Acute_Typhi', 'Carrier', 'Control_HealthySerosurvey'))
bgd_beta_all_groups$pn_res %>% kbl %>% kable_styling()
|
|
R2
|
Pr(>F)
|
is_it_significant
|
|
Group
|
0.2679584
|
0.0009990
|
significant
|
|
Sex:Age
|
0.0094338
|
0.1098901
|
not_significant
|
|
Age
|
0.0084979
|
0.1838162
|
not_significant
|
|
Sex
|
0.0078067
|
0.2047952
|
not_significant
|
|
Sex:Group
|
0.0155854
|
0.2147852
|
not_significant
|
|
Group:Age
|
0.0114756
|
0.5074925
|
not_significant
|
|
Sex:Group:Age
|
0.0097058
|
0.6913087
|
not_significant
|
|
Total
|
1.0000000
|
NA
|
NA
|
|
Residual
|
0.6695363
|
NA
|
NA
|
# bgd_beta_all_groups$pc12
# bgd_beta_all_groups$pc34
mal_beta_all_groups <- metaphlan_beta(strataa_metaphlan_data, strataa_metaphlan_metadata, c('Malawi'), c('Acute_Typhi', 'Carrier', 'Control_HealthySerosurvey'))
mal_beta_all_groups$pn_res %>% kbl %>% kable_styling()
|
|
R2
|
Pr(>F)
|
is_it_significant
|
|
Group
|
0.1942771
|
0.0009990
|
significant
|
|
Sex:Group
|
0.0231539
|
0.0289710
|
not_significant
|
|
Sex:Age
|
0.0138745
|
0.0289710
|
not_significant
|
|
Group:Age
|
0.0232750
|
0.0369630
|
not_significant
|
|
Age
|
0.0114622
|
0.0609391
|
not_significant
|
|
Sex
|
0.0075054
|
0.3236763
|
not_significant
|
|
Sex:Group:Age
|
0.0125237
|
0.5934066
|
not_significant
|
|
Total
|
1.0000000
|
NA
|
NA
|
|
Residual
|
0.7139281
|
NA
|
NA
|
# mal_beta_all_groups$pc12
# mal_beta_all_groups$pc34
nep_beta_all_groups <- metaphlan_beta(strataa_metaphlan_data, strataa_metaphlan_metadata, c('Nepal'), c('Acute_Typhi', 'Carrier', 'Control_HealthySerosurvey'))
nep_beta_all_groups$pn_res %>% kbl %>% kable_styling()
|
|
R2
|
Pr(>F)
|
is_it_significant
|
|
Group
|
0.0898195
|
0.0009990
|
significant
|
|
Sex
|
0.0176933
|
0.1118881
|
not_significant
|
|
Sex:Age
|
0.0150035
|
0.2247752
|
not_significant
|
|
Age
|
0.0125478
|
0.4445554
|
not_significant
|
|
Sex:Group
|
0.0239139
|
0.5294705
|
not_significant
|
|
Sex:Group:Age
|
0.0175838
|
0.8611389
|
not_significant
|
|
Group:Age
|
0.0164764
|
0.9410589
|
not_significant
|
|
Total
|
1.0000000
|
NA
|
NA
|
|
Residual
|
0.8069619
|
NA
|
NA
|
all_countries_beta_all_groups$pc12 / bgd_beta_all_groups$pc12 / nep_beta_all_groups$pc12

Healthy vs Typhi
Plot phyla bar
plots
phyla <- strataa_metaphlan_data %>% mutate(clade_name = rownames(strataa_metaphlan_data)) %>% filter(grepl("p__", clade_name)) %>% filter(!grepl("c__", clade_name)) %>% pivot_longer(!c(clade_name, lowest_taxonomic_level), names_to = "sample", values_to = "relative_abundance")
metadata_for_phyla_plots <- strataa_metaphlan_metadata %>% dplyr::select(SampleID, Group, Country)
phyla_clean_metadata <- prep_data_to_plot_phyla(phyla, strataa_metaphlan_metadata)
order_of_groups <- c("Typhi", "Healthy")
bangladesh_phyla_plot <- plot_per_country_abundance(phyla_clean_metadata = phyla_clean_metadata, country = "Bangladesh", group_order = order_of_groups)
# bangladesh_phyla_plot
malawi_phyla_plot <- plot_per_country_abundance(phyla_clean_metadata = phyla_clean_metadata, country = "Malawi", group_order = order_of_groups)
nepal_phyla_plot <- plot_per_country_abundance(phyla_clean_metadata = phyla_clean_metadata, country = "Nepal", group_order = order_of_groups)
bangladesh_phyla_plot / malawi_phyla_plot / nepal_phyla_plot

# going back to phyla so that get all the weird rare ones.
# need to join to meta-data
# remove carriers
# group by clade, group, country
# summarise to get the median.
# pivot wider to get the table.
# dplyr::select(order(colnames(.))) rearranges the columns in alphabetical order
# relocate moves the clade_name column to the first column.
phyla %>% left_join(metadata_for_phyla_plots, by = c("sample" = "SampleID")) %>% filter(Group != 'Carrier') %>% group_by(clade_name, Group, Country) %>% summarise(median_prevalence = median(relative_abundance)) %>% pivot_wider(names_from = c('Country', 'Group'), values_from = 'median_prevalence') %>% dplyr::select(order(colnames(.))) %>% relocate(clade_name, .before = 1) %>% kbl() %>% kable_styling()
|
clade_name
|
Bangladesh_Acute_Typhi
|
Bangladesh_Control_HealthySerosurvey
|
Malawi_Acute_Typhi
|
Malawi_Control_HealthySerosurvey
|
Nepal_Acute_Typhi
|
Nepal_Control_HealthySerosurvey
|
|
k__Archaea|p__Candidatus_Thermoplasmatota
|
0.000000
|
0.000000
|
0.000000
|
0.000000
|
0.00000
|
0.00000
|
|
k__Archaea|p__Euryarchaeota
|
0.000000
|
0.000000
|
0.249010
|
0.000000
|
0.00000
|
0.22474
|
|
k__Archaea|p__Thaumarchaeota
|
0.000000
|
0.000000
|
0.000000
|
0.000000
|
0.00000
|
0.00000
|
|
k__Bacteria|p__Acidobacteria
|
0.000000
|
0.000000
|
0.000000
|
0.000000
|
0.00000
|
0.00000
|
|
k__Bacteria|p__Actinobacteria
|
14.361185
|
13.136440
|
1.893725
|
0.524965
|
3.33634
|
3.81550
|
|
k__Bacteria|p__Bacteria_unclassified
|
0.000000
|
0.000000
|
0.000000
|
0.000000
|
0.00000
|
0.00000
|
|
k__Bacteria|p__Bacteroidetes
|
0.911970
|
2.216730
|
15.361165
|
34.630185
|
28.24679
|
20.53695
|
|
k__Bacteria|p__Candidatus_Melainabacteria
|
0.000000
|
0.000000
|
0.038085
|
0.052210
|
0.00000
|
0.02157
|
|
k__Bacteria|p__Candidatus_Saccharibacteria
|
0.017310
|
0.015625
|
0.000000
|
0.000000
|
0.00000
|
0.00203
|
|
k__Bacteria|p__Chloroflexi
|
0.000000
|
0.000000
|
0.000000
|
0.000000
|
0.00000
|
0.00000
|
|
k__Bacteria|p__Elusimicrobia
|
0.000000
|
0.000000
|
0.000000
|
0.000000
|
0.00000
|
0.00000
|
|
k__Bacteria|p__Firmicutes
|
80.670075
|
72.161565
|
60.161350
|
58.311770
|
53.74090
|
60.17250
|
|
k__Bacteria|p__Fusobacteria
|
0.000000
|
0.000000
|
0.000000
|
0.000000
|
0.00000
|
0.00000
|
|
k__Bacteria|p__Lentisphaerae
|
0.000000
|
0.000000
|
0.000000
|
0.000000
|
0.00000
|
0.00000
|
|
k__Bacteria|p__Proteobacteria
|
0.629715
|
1.272295
|
6.606440
|
6.195335
|
2.90174
|
2.52509
|
|
k__Bacteria|p__Spirochaetes
|
0.000000
|
0.000000
|
0.000000
|
0.000000
|
0.00000
|
0.00000
|
|
k__Bacteria|p__Synergistetes
|
0.000000
|
0.000000
|
0.000000
|
0.000000
|
0.00000
|
0.00000
|
|
k__Bacteria|p__Tenericutes
|
0.000000
|
0.000000
|
0.002715
|
0.000000
|
0.01965
|
0.01362
|
|
k__Bacteria|p__Verrucomicrobia
|
0.000000
|
0.000000
|
0.000000
|
0.000000
|
0.00000
|
0.00000
|
|
k__Eukaryota|p__Ascomycota
|
0.000000
|
0.000000
|
0.000000
|
0.000000
|
0.00000
|
0.00000
|
|
k__Eukaryota|p__Eukaryota_unclassified
|
0.000000
|
0.000000
|
0.000000
|
0.000000
|
0.00000
|
0.00000
|
alpha
diversity
Alpha diversity - all countries, healthy and acute
all_countries_healthy_acute_alpha <- metaphlan_alpha(strataa_metaphlan_data, strataa_metaphlan_metadata, countries_of_interest = c('Bangladesh', 'Malawi', 'Nepal'), groups_of_interest = c('Acute_Typhi', 'Control_HealthySerosurvey'), comparisons = list(c('Acute_Typhi', 'Control_HealthySerosurvey')))

# all_countries_healthy_acute_alpha$alpha_by_country
all_countries_healthy_acute_alpha$alpha_anova_summary_with_var_names %>% dplyr::mutate_if(is.numeric, funs(as.character(signif(., 3)))) %>% kbl() %>% kable_styling()
|
rownames(alpha_anova_summary[[1]])
|
Df
|
Sum.Sq
|
Mean.Sq
|
F.value
|
Pr..F.
|
is_it_significant
|
|
Country
|
2
|
5.74
|
2.87
|
17.4
|
1.36e-07
|
significant
|
|
Country:Age:Antibiotics_taken_before_sampling_yes_no_assumptions
|
2
|
1.68
|
0.842
|
5.12
|
0.00699
|
significant
|
|
Sex:Age
|
1
|
1.07
|
1.07
|
6.5
|
0.0117
|
not_significant
|
|
Country:Sex
|
2
|
1.5
|
0.75
|
4.56
|
0.0119
|
not_significant
|
|
Country:Sex:Antibiotics_taken_before_sampling_yes_no_assumptions
|
2
|
1.39
|
0.693
|
4.21
|
0.0165
|
not_significant
|
|
Country:Age
|
2
|
1.02
|
0.51
|
3.1
|
0.0477
|
not_significant
|
|
Sex:Group
|
1
|
0.628
|
0.628
|
3.82
|
0.0525
|
not_significant
|
|
Country:Antibiotics_taken_before_sampling_yes_no_assumptions
|
2
|
0.816
|
0.408
|
2.48
|
0.0871
|
not_significant
|
|
Country:Group
|
2
|
0.698
|
0.349
|
2.12
|
0.123
|
not_significant
|
|
Antibiotics_taken_before_sampling_yes_no_assumptions
|
2
|
0.618
|
0.309
|
1.88
|
0.157
|
not_significant
|
|
Group
|
1
|
0.164
|
0.164
|
0.997
|
0.319
|
not_significant
|
|
Age:Antibiotics_taken_before_sampling_yes_no_assumptions
|
2
|
0.359
|
0.179
|
1.09
|
0.339
|
not_significant
|
|
Sex
|
1
|
0.0592
|
0.0592
|
0.36
|
0.55
|
not_significant
|
|
Country:Sex:Age
|
2
|
0.188
|
0.0938
|
0.57
|
0.567
|
not_significant
|
|
Sex:Age:Antibiotics_taken_before_sampling_yes_no_assumptions
|
1
|
0.0511
|
0.0511
|
0.31
|
0.578
|
not_significant
|
|
Sex:Group:Age
|
1
|
0.0368
|
0.0368
|
0.224
|
0.637
|
not_significant
|
|
Group:Age
|
1
|
0.0147
|
0.0147
|
0.0891
|
0.766
|
not_significant
|
|
Country:Group:Age
|
2
|
0.0713
|
0.0357
|
0.217
|
0.805
|
not_significant
|
|
Age
|
1
|
0.00772
|
0.00772
|
0.0469
|
0.829
|
not_significant
|
|
Country:Sex:Group
|
2
|
0.0595
|
0.0297
|
0.181
|
0.835
|
not_significant
|
|
Sex:Antibiotics_taken_before_sampling_yes_no_assumptions
|
1
|
0.00675
|
0.00675
|
0.041
|
0.84
|
not_significant
|
|
Country:Sex:Group:Age
|
2
|
0.05
|
0.025
|
0.152
|
0.859
|
not_significant
|
|
Country:Sex:Age:Antibiotics_taken_before_sampling_yes_no_assumptions
|
1
|
0.000136
|
0.000136
|
0.000828
|
0.977
|
not_significant
|
|
Residuals
|
163
|
26.8
|
0.165
|
NA
|
NA
|
NA
|
all_countries_healthy_acute_alpha$alpha_plot_group

all_countries_healthy_acute_alpha$alpha_plot_antibiotics

alpha diversity, malawi only
malawi_healthy_acute_alpha <- metaphlan_alpha(strataa_metaphlan_data, strataa_metaphlan_metadata, countries_of_interest = c('Malawi'), groups_of_interest = c('Acute_Typhi', 'Control_HealthySerosurvey'), comparisons = list(c('Acute_Typhi', 'Control_HealthySerosurvey')))

malawi_healthy_acute_alpha$alpha_anova_summary_with_var_names %>% dplyr::mutate_if(is.numeric, funs(as.character(signif(., 3)))) %>% kbl() %>% kable_styling()
|
rownames(alpha_anova_summary[[1]])
|
Df
|
Sum.Sq
|
Mean.Sq
|
F.value
|
Pr..F.
|
is_it_significant
|
|
Age:Antibiotics_taken_before_sampling_yes_no_assumptions
|
1
|
2.27
|
2.27
|
16
|
0.000167
|
significant
|
|
Antibiotics_taken_before_sampling_yes_no_assumptions
|
1
|
1.31
|
1.31
|
9.27
|
0.0034
|
significant
|
|
Sex
|
1
|
1.16
|
1.16
|
8.21
|
0.00567
|
significant
|
|
Group
|
1
|
0.84
|
0.84
|
5.93
|
0.0177
|
not_significant
|
|
Age
|
1
|
0.451
|
0.451
|
3.19
|
0.079
|
not_significant
|
|
Sex:Antibiotics_taken_before_sampling_yes_no_assumptions
|
1
|
0.406
|
0.406
|
2.87
|
0.0952
|
not_significant
|
|
Sex:Group
|
1
|
0.313
|
0.313
|
2.21
|
0.142
|
not_significant
|
|
Sex:Group:Age
|
1
|
0.0978
|
0.0978
|
0.691
|
0.409
|
not_significant
|
|
Sex:Age
|
1
|
0.0164
|
0.0164
|
0.116
|
0.734
|
not_significant
|
|
Group:Age
|
1
|
0.00122
|
0.00122
|
0.00864
|
0.926
|
not_significant
|
|
Residuals
|
63
|
8.91
|
0.141
|
NA
|
NA
|
NA
|
malawi_healthy_acute_alpha$alpha_plot_group

malawi_healthy_acute_alpha$alpha_plot_antibiotics

beta diversity
Acute vs healthy.
all_countries_beta_acute_healthy <- metaphlan_beta(strataa_metaphlan_data, strataa_metaphlan_metadata, c('Bangladesh', 'Malawi', 'Nepal'), c('Acute_Typhi', 'Control_HealthySerosurvey'))
all_countries_beta_acute_healthy$pn_res %>% kbl %>% kable_styling()
|
|
R2
|
Pr(>F)
|
is_it_significant
|
|
Group
|
0.0285252
|
0.0009990
|
significant
|
|
Group:Age
|
0.0117687
|
0.0059940
|
significant
|
|
Age
|
0.0092014
|
0.0399600
|
not_significant
|
|
Sex
|
0.0070879
|
0.1178821
|
not_significant
|
|
Sex:Group:Age
|
0.0072077
|
0.1268731
|
not_significant
|
|
Antibiotics_taken_before_sampling_yes_no_assumptions
|
0.0117438
|
0.2057942
|
not_significant
|
|
Sex:Age
|
0.0058394
|
0.2337662
|
not_significant
|
|
Sex:Group
|
0.0057923
|
0.2627373
|
not_significant
|
|
Sex:Antibiotics_taken_before_sampling_yes_no_assumptions
|
0.0055130
|
0.2757243
|
not_significant
|
|
Sex:Age:Antibiotics_taken_before_sampling_yes_no_assumptions
|
0.0043096
|
0.5254745
|
not_significant
|
|
Age:Antibiotics_taken_before_sampling_yes_no_assumptions
|
0.0070480
|
0.8391608
|
not_significant
|
|
Total
|
1.0000000
|
NA
|
NA
|
|
Residual
|
0.8959630
|
NA
|
NA
|
bgd_beta_acute_healthy <- metaphlan_beta(strataa_metaphlan_data, strataa_metaphlan_metadata, c('Bangladesh'), c('Acute_Typhi', 'Control_HealthySerosurvey'))
bgd_beta_acute_healthy$pn_res %>% kbl %>% kable_styling()
|
|
R2
|
Pr(>F)
|
is_it_significant
|
|
Group
|
0.0782718
|
0.0009990
|
significant
|
|
Sex
|
0.0230592
|
0.0379620
|
not_significant
|
|
Antibiotics_taken_before_sampling_yes_no_assumptions
|
0.0199564
|
0.0629371
|
not_significant
|
|
Sex:Age
|
0.0168558
|
0.1398601
|
not_significant
|
|
Age
|
0.0146689
|
0.2367632
|
not_significant
|
|
Group:Age
|
0.0109377
|
0.4795205
|
not_significant
|
|
Sex:Group:Age
|
0.0100500
|
0.5384615
|
not_significant
|
|
Age:Antibiotics_taken_before_sampling_yes_no_assumptions
|
0.0080933
|
0.6863137
|
not_significant
|
|
Sex:Antibiotics_taken_before_sampling_yes_no_assumptions
|
0.0074906
|
0.8311688
|
not_significant
|
|
Sex:Group
|
0.0067483
|
0.9140859
|
not_significant
|
|
Sex:Age:Antibiotics_taken_before_sampling_yes_no_assumptions
|
0.0059228
|
0.9230769
|
not_significant
|
|
Total
|
1.0000000
|
NA
|
NA
|
|
Residual
|
0.7979452
|
NA
|
NA
|
mal_beta_acute_healthy <- metaphlan_beta(strataa_metaphlan_data, strataa_metaphlan_metadata, c('Malawi'), c('Acute_Typhi', 'Control_HealthySerosurvey'))
mal_beta_acute_healthy$pn_res %>% kbl %>% kable_styling()
|
|
R2
|
Pr(>F)
|
is_it_significant
|
|
Group
|
0.1995900
|
0.0009990
|
significant
|
|
Age:Antibiotics_taken_before_sampling_yes_no_assumptions
|
0.0371210
|
0.0009990
|
significant
|
|
Antibiotics_taken_before_sampling_yes_no_assumptions
|
0.0381931
|
0.0029970
|
significant
|
|
Sex:Antibiotics_taken_before_sampling_yes_no_assumptions
|
0.0267524
|
0.0169830
|
not_significant
|
|
Sex:Group
|
0.0200496
|
0.0279720
|
not_significant
|
|
Age
|
0.0194908
|
0.0319680
|
not_significant
|
|
Sex
|
0.0141068
|
0.1228771
|
not_significant
|
|
Sex:Age
|
0.0144797
|
0.1328671
|
not_significant
|
|
Sex:Group:Age
|
0.0091754
|
0.4915085
|
not_significant
|
|
Group:Age
|
0.0078947
|
0.6113886
|
not_significant
|
|
Total
|
1.0000000
|
NA
|
NA
|
|
Residual
|
0.6131466
|
NA
|
NA
|
nep_beta_acute_healthy <- metaphlan_beta(strataa_metaphlan_data, strataa_metaphlan_metadata, c('Nepal'), c('Acute_Typhi', 'Control_HealthySerosurvey'))
nep_beta_acute_healthy$pn_res %>% kbl %>% kable_styling()
|
|
R2
|
Pr(>F)
|
is_it_significant
|
|
Group
|
0.0598990
|
0.0019980
|
significant
|
|
Sex
|
0.0484279
|
0.0149850
|
not_significant
|
|
Sex:Group
|
0.0246755
|
0.3126873
|
not_significant
|
|
Sex:Age:Antibiotics_taken_before_sampling_yes_no_assumptions
|
0.0244159
|
0.3196803
|
not_significant
|
|
Antibiotics_taken_before_sampling_yes_no_assumptions
|
0.0450942
|
0.4175824
|
not_significant
|
|
Sex:Antibiotics_taken_before_sampling_yes_no_assumptions
|
0.0207951
|
0.4925075
|
not_significant
|
|
Sex:Group:Age
|
0.0175717
|
0.6813187
|
not_significant
|
|
Age
|
0.0154038
|
0.7932068
|
not_significant
|
|
Sex:Age
|
0.0148604
|
0.8301698
|
not_significant
|
|
Group:Age
|
0.0102689
|
0.9900100
|
not_significant
|
|
Age:Antibiotics_taken_before_sampling_yes_no_assumptions
|
0.0188359
|
0.9980020
|
not_significant
|
|
Total
|
1.0000000
|
NA
|
NA
|
|
Residual
|
0.6997518
|
NA
|
NA
|
all_countries_beta_acute_healthy$pc12 + bgd_beta_acute_healthy$pc12 + mal_beta_acute_healthy$pc12 + nep_beta_acute_healthy$pc12

maaslin2
taxonomy
Maaslin basics
groups_to_analyse <- c('Acute_Typhi', 'Control_HealthySerosurvey')
bang_variables_for_analysis <- c("Group", "Sex", "Age", "Antibiotics_taken_before_sampling_yes_no_assumptions")
mwi_variables_for_analysis <- c("Group", "Sex", "Age", "Antibiotics_taken_before_sampling_yes_no_assumptions", "sequencing_lane")
nep_variables_for_analysis <- c("Group", "Sex", "Age", "Antibiotics_taken_before_sampling_yes_no_assumptions")
bangladesh_taxonomic_maaslin <- read_in_maaslin('Bangladesh', groups_to_analyse, bang_variables_for_analysis, 'metaphlan')
malawi_taxonomic_maaslin <- read_in_maaslin('Malawi', groups_to_analyse, mwi_variables_for_analysis, 'metaphlan')
nepal_taxonomic_maaslin <- read_in_maaslin('Nepal', groups_to_analyse, nep_variables_for_analysis, 'metaphlan')
bangladesh_taxonomic_maaslin_filtered <- filter_taxonomic_maaslin(bangladesh_taxonomic_maaslin)
malawi_taxonomic_maaslin_filtered <- filter_taxonomic_maaslin(malawi_taxonomic_maaslin)
nepal_taxonomic_maaslin_filtered <- filter_taxonomic_maaslin(nepal_taxonomic_maaslin)
bangladesh_maaslin_stats <- basic_maaslin_stats(bangladesh_taxonomic_maaslin_filtered, 'Bangladesh', bang_variables_for_analysis, groups_to_analyse)
malawi_maaslin_stats <- basic_maaslin_stats(malawi_taxonomic_maaslin_filtered, 'Malawi', mwi_variables_for_analysis, groups_to_analyse)
nepal_maaslin_stats <- basic_maaslin_stats(nepal_taxonomic_maaslin_filtered, 'Nepal', nep_variables_for_analysis, groups_to_analyse)
malawi_maaslin_stats$volcano_plot / bangladesh_maaslin_stats$volcano_plot / nepal_maaslin_stats$volcano_plot

nrow(malawi_maaslin_stats$maaslin_results_sig)
## [1] 296
nrow(bangladesh_maaslin_stats$maaslin_results_sig)
## [1] 23
nrow(nepal_maaslin_stats$maaslin_results_sig)
## [1] 0
There were 296 species significantly (q-val < 0.05) associated
with health/disease in Malawi, in Bangladesh, and in Nepal.
Combine the taxonomic maaslins, and print out the species that are
sig in both and share directions.
Because sequencing run and participant type are totally confounded
for Bangladesh, need to exclude sequencing run from the final model for
Bangladesh (otherwise, wipes out the signals).
associated at both sites
bang_mal <- list(bangladesh_taxonomic_maaslin_filtered, malawi_taxonomic_maaslin_filtered)
combined_results <- run_combine_maaslins(bang_mal, c('_bang', '_mal'), mwi_variables_for_analysis, groups_to_analyse, 'metaphlan', maaslin_taxonomic_output_root_folder)
# View(combined_results$positive_coef)
# View(combined_results$negative_coef)
combined_results$positive_coef %>% filter(grepl('^s', lowest_taxonomic_level)) %>%
select(!c(metadata, value, N_bang, N.not.0_bang, pval_bang, N_mal, N.not.0_mal, pval_mal)) %>%
rename(Species = lowest_taxonomic_level, `Coefficient Bangladesh` = coef_bang, `Standard Error Bangladesh` = stderr_bang, `Q-value Bangladesh` = qval_bang, `Coefficient Malawi` = coef_mal, `Standard Error Malawi` = stderr_mal, `Q-value Malawi` = qval_mal) %>%
dplyr::mutate_if(is.numeric, funs(as.character(signif(., 3)))) %>%
kbl() %>% kable_styling()
|
feature
|
Species
|
Coefficient Bangladesh
|
Standard Error Bangladesh
|
Q-value Bangladesh
|
Coefficient Malawi
|
Standard Error Malawi
|
Q-value Malawi
|
|
k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Clostridiaceae.g__Clostridium.s__Clostridium_SGB6179
|
s__Clostridium_SGB6179
|
8.79
|
1.34
|
4.25e-05
|
3.11
|
0.936
|
0.0286
|
|
k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Prevotellaceae.g__Prevotella.s__Prevotella_copri_clade_A
|
s__Prevotella_copri_clade_A
|
4.54
|
0.978
|
0.00971
|
7.64
|
1.25
|
1.21e-05
|
|
k__Bacteria.p__Firmicutes.c__Negativicutes.o__Veillonellales.f__Veillonellaceae.g__GGB4266.s__GGB4266_SGB5809
|
s__GGB4266_SGB5809
|
4.58
|
1.09
|
0.0147
|
6.94
|
0.981
|
4.38e-07
|
|
k__Bacteria.p__Proteobacteria.c__Gammaproteobacteria.o__Pasteurellales.f__Pasteurellaceae.g__Haemophilus.s__Haemophilus_parainfluenzae
|
s__Haemophilus_parainfluenzae
|
3.64
|
0.979
|
0.03
|
6.92
|
0.953
|
2.26e-07
|
|
k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Peptostreptococcaceae.g__Romboutsia.s__Romboutsia_timonensis
|
s__Romboutsia_timonensis
|
4.52
|
1.25
|
0.0386
|
5.07
|
0.903
|
5.91e-05
|
combined_results$negative_coef %>% filter(grepl('^s', lowest_taxonomic_level)) %>%
select(!c(metadata, value, N_bang, N.not.0_bang, pval_bang, N_mal, N.not.0_mal, pval_mal)) %>%
rename(Species = lowest_taxonomic_level, `Coefficient Bangladesh` = coef_bang, `Standard Error Bangladesh` = stderr_bang, `Q-value Bangladesh` = qval_bang, `Coefficient Malawi` = coef_mal, `Standard Error Malawi` = stderr_mal, `Q-value Malawi` = qval_mal) %>%
dplyr::mutate_if(is.numeric, funs(as.character(signif(., 3)))) %>%
kbl() %>% kable_styling()
|
feature
|
Species
|
Coefficient Bangladesh
|
Standard Error Bangladesh
|
Q-value Bangladesh
|
Coefficient Malawi
|
Standard Error Malawi
|
Q-value Malawi
|
|
k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Lachnospiraceae.g__Lachnospiraceae_unclassified.s__Lachnospiraceae_bacterium
|
s__Lachnospiraceae_bacterium
|
-3.05
|
0.838
|
0.0366
|
-2.87
|
0.785
|
0.0135
|
# todo - refactoring the run_combine_maaslins means that we dont get the species that are only associated at one site. need to fix that.
# nrow(combined_results$mwi_maaslin_only)
# nrow(combined_results$bang_maaslin_only)
There were species significantly (q-val < 0.05) associated with
health/disease in Malawi only and in Bangladesh only.
The ones associated at only one site are written out to a file, you
can look at them manually there.
plots of species of
interest
strataa_metaphlan_data_longer <- strataa_metaphlan_data %>% mutate(feature = rownames(strataa_metaphlan_data)) %>% pivot_longer(!c(feature, lowest_taxonomic_level), names_to = "SampleID", values_to = "prevalence")
strataa_metaphlan_data_longer_meta <- strataa_metaphlan_data_longer %>% left_join(strataa_metaphlan_metadata, by = c("SampleID" = "SampleID"))
pc <- run_plot_species_of_interest(strataa_metaphlan_data_longer_meta, 's__Prevotella_copri_clade_A')

cs <- run_plot_species_of_interest(strataa_metaphlan_data_longer_meta, 's__Clostridium_SGB6179')

SGB5809 <-run_plot_species_of_interest(strataa_metaphlan_data_longer_meta, 's__GGB4266_SGB5809')

hp <- run_plot_species_of_interest(strataa_metaphlan_data_longer_meta, 's__Haemophilus_parainfluenzae')

rt <- run_plot_species_of_interest(strataa_metaphlan_data_longer_meta, 's__Romboutsia_timonensis')

lb <- run_plot_species_of_interest(strataa_metaphlan_data_longer_meta, 's__Lachnospiraceae_bacterium')

# pc + cs + SGB5809 + hp + rt + lb
P. copri
clades
Print the results for all the P. copri clades
# all_taxa_maaslin <- combined_maaslins$all_features
# p_copri_maaslin <- all_taxa_maaslin %>% filter(grepl('_Prevotella_copri_', feature)) %>% filter(qval_bang < 0.05 | qval_mal < 0.05)
# write_csv(p_copri_maaslin, file.path(maaslin_taxonomic_output_root_folder, 'combined_maaslins_p_copri.csv'))
maaslin functional
groups
Functional groups associated with health
bang_variables_for_analysis <- c("Group", "Sex", "Age", "Antibiotics_taken_before_sampling_yes_no_assumptions")
mwi_variables_for_analysis <- c("Group", "Sex", "Age", "Antibiotics_taken_before_sampling_yes_no_assumptions", "sequencing_lane")
# nep_variables_for_analysis <- c("Group", "Sex", "Age", "Antibiotics_taken_before_sampling_yes_no_assumptions")
groups_to_analyse <- c('Acute_Typhi', 'Control_HealthySerosurvey')
bang_func_maaslin <- read_in_maaslin('Bangladesh', groups_to_analyse, bang_variables_for_analysis, 'bigmap')
mal_func_maaslin <- read_in_maaslin('Malawi', groups_to_analyse, mwi_variables_for_analysis, 'bigmap')
# combined_results <- run_combine_maaslins(bang_func_maaslin, mal_func_maaslin, bang_variables_for_analysis, mwi_variables_for_analysis, groups_to_analyse, 'bigmap', maaslin_functional_output_root_folder)
bang_mal <- list(bang_func_maaslin, mal_func_maaslin)
combined_results <- run_combine_maaslins(bang_mal, c('_bang', '_mal'), mwi_variables_for_analysis, groups_to_analyse, 'bigmap', maaslin_functional_output_root_folder)
combined_results$positive_coef %>%
select(!c(metadata, value, N_bang, N.not.0_bang, pval_bang, N_mal, N.not.0_mal, pval_mal)) %>%
rename(`Coefficient Bangladesh` = coef_bang, `Standard Error Bangladesh` = stderr_bang, `Q-value Bangladesh` = qval_bang, `Coefficient Malawi` = coef_mal, `Standard Error Malawi` = stderr_mal, `Q-value Malawi` = qval_mal) %>%
dplyr::mutate_if(is.numeric, funs(as.character(signif(., 3)))) %>%
kbl() %>% kable_styling()
|
MGC_class
|
Species
|
feature
|
Coefficient Bangladesh
|
Standard Error Bangladesh
|
Q-value Bangladesh
|
Coefficient Malawi
|
Standard Error Malawi
|
Q-value Malawi
|
|
succinate2propionate
|
Dialister_invisus_DSM_15470_genomic_scaffold
|
gb.GG698602.1.region002.GC_DNA..Entryname.succinate2propionate..OS.Dialister_invisus_DSM_15470_genomic_scaffold..SMASHregion.region002..NR.1
|
4.49
|
0.769
|
0.000238
|
4.32
|
0.937
|
0.00196
|
|
Pyruvate2acetate.formate
|
Prevotella_copri_strain_AM22.19
|
gb.QRIF01000001.1.region001.GC_DNA..Entryname.Pyruvate2acetate.formate..OS.Prevotella_copri_strain_AM22.19..SMASHregion.region001..NR.7
|
4.95
|
0.865
|
0.000259
|
4.76
|
1.23
|
0.0122
|
|
TPP_AA_metabolism
|
Prevotella_sp._S7_MS_2
|
gb.JRPT01000001.1.region001.GC_DNA..Entryname.TPP_AA_metabolism..OS.Prevotella_sp._S7_MS_2..SMASHregion.region001..NR.11..BG.2
|
4.86
|
0.89
|
0.000381
|
5.14
|
1.16
|
0.00314
|
|
Arginine2_Hcarbonate
|
.Clostridium._sordellii_genome_assembly
|
gb.LN679998.1.region007.GC_DNA..Entryname.Arginine2_Hcarbonate..OS..Clostridium._sordellii_genome_assembly..SMASHregion.region007..NR.2
|
4.59
|
0.824
|
0.000381
|
3.51
|
0.732
|
0.00116
|
|
Others_HGD_unassigned
|
Prevotella_copri_strain_AF24.12
|
gb.QRVA01000039.1.region001.GC_DNA..Entryname.Others_HGD_unassigned..OS.Prevotella_copri_strain_AF24.12..SMASHregion.region001..NR.6
|
4.72
|
0.862
|
0.000381
|
5.74
|
1.09
|
0.000359
|
|
Pyruvate2acetate.formate
|
Prevotella_sp._AM34.19LB
|
gb.QSIG01000002.1.region002.GC_DNA..Entryname.Pyruvate2acetate.formate..OS.Prevotella_sp._AM34.19LB..SMASHregion.region002..NR.3
|
4.25
|
0.783
|
0.000381
|
3.28
|
0.956
|
0.0336
|
|
TPP_fatty_acids
|
Prevotella_copri_strain_AM22.19
|
gb.QRIF01000003.1.region001.GC_DNA..Entryname.TPP_fatty_acids..OS.Prevotella_copri_strain_AM22.19..SMASHregion.region001..NR.6
|
5.07
|
0.952
|
0.000422
|
5.33
|
1.07
|
0.000706
|
|
Rnf_complex
|
Prevotella_copri_strain_AF38.11
|
gb.QROP01000022.1.region001.GC_DNA..Entryname.Rnf_complex..OS.Prevotella_copri_strain_AF38.11..SMASHregion.region001..NR.6
|
4.74
|
0.914
|
0.000649
|
5.23
|
1.12
|
0.00176
|
|
PFOR_II_pathway
|
Romboutsia_ilealis_strain_CRIB_genome
|
gb.LN555523.1.region001.GC_DNA..Entryname.PFOR_II_pathway..OS.Romboutsia_ilealis_strain_CRIB_genome..SMASHregion.region001..NR.1
|
4.84
|
0.983
|
0.00104
|
2.98
|
0.719
|
0.00629
|
|
Others_HGD_unassigned
|
Romboutsia_ilealis_strain_CRIB_genome
|
gb.LN555523.1.region003.GC_DNA..Entryname.Others_HGD_unassigned..OS.Romboutsia_ilealis_strain_CRIB_genome..SMASHregion.region003..NR.1
|
4.59
|
0.935
|
0.00105
|
2.76
|
0.558
|
0.000812
|
|
porA
|
Prevotella_copri_strain_AF10.17
|
gb.QSAV01000013.1.region001.GC_DNA..Entryname.porA..OS.Prevotella_copri_strain_AF10.17..SMASHregion.region001..NR.6
|
4.46
|
0.918
|
0.00124
|
5.08
|
1.17
|
0.00387
|
|
Rnf_complex
|
Romboutsia_ilealis_strain_CRIB_genome
|
gb.LN555523.1.region002.GC_DNA..Entryname.Rnf_complex..OS.Romboutsia_ilealis_strain_CRIB_genome..SMASHregion.region002..NR.1
|
4.36
|
0.957
|
0.0031
|
2.83
|
0.622
|
0.00228
|
|
PFOR_II_pathway
|
Romboutsia_ilealis_strain_CRIB_genome
|
gb.LN555523.1.region004.GC_DNA..Entryname.PFOR_II_pathway..OS.Romboutsia_ilealis_strain_CRIB_genome..SMASHregion.region004..NR.1
|
4.39
|
0.968
|
0.00318
|
3.37
|
0.814
|
0.00646
|
|
Formate_dehydrogenase
|
Haemophilus_sp._HMSC61B11_genomic_scaffold
|
gb.KV837963.1.region001.GC_DNA..Entryname.Formate_dehydrogenase..OS.Haemophilus_sp._HMSC61B11_genomic_scaffold..SMASHregion.region001..NR.5
|
3.41
|
0.754
|
0.00327
|
4.89
|
0.985
|
0.000767
|
|
Pyruvate2acetate.formate.Acetyl.CoA_pathway
|
Romboutsia_ilealis_strain_CRIB_genome
|
gb.LN555523.1.region005.GC_DNA..Entryname.Pyruvate2acetate.formate.Acetyl.CoA_pathway..OS.Romboutsia_ilealis_strain_CRIB_genome..SMASHregion.region005..NR.1
|
4.26
|
1
|
0.0072
|
4.58
|
1.11
|
0.00659
|
|
Pyruvate2acetate.formate
|
Clostridium_carboxidivorans
|
gb.CP011803.1.region013.GC_DNA..Entryname.Pyruvate2acetate.formate..OS.Clostridium_carboxidivorans..SMASHregion.region013..NR.1
|
3.69
|
0.89
|
0.00954
|
3.11
|
0.762
|
0.00733
|
|
OD_eut_pdu_related.PFOR_II_pathway
|
Paeniclostridium_sordellii_strain_AM370
|
gb.CP014150.1.region011.GC_DNA..Entryname.OD_eut_pdu_related.PFOR_II_pathway..OS.Paeniclostridium_sordellii_strain_AM370..SMASHregion.region011..NR.4
|
2.61
|
0.629
|
0.00954
|
3.94
|
0.574
|
4.58e-06
|
|
TPP_AA_metabolism.Arginine2putrescine
|
Haemophilus_sp._HMSC61B11_genomic_scaffold
|
gb.KV837955.1.region001.GC_DNA..Entryname.TPP_AA_metabolism.Arginine2putrescine..OS.Haemophilus_sp._HMSC61B11_genomic_scaffold..SMASHregion.region001..NR.5
|
3.93
|
0.951
|
0.00959
|
4.39
|
0.918
|
0.0012
|
|
Respiratory_glycerol
|
Haemophilus_parainfluenzae_HK2019
|
gb.AJTC01000042.1.region001.GC_DNA..Entryname.Respiratory_glycerol..OS.Haemophilus_parainfluenzae_HK2019..SMASHregion.region001..NR.5
|
3.55
|
0.882
|
0.0136
|
5.14
|
0.975
|
0.000344
|
|
Pyruvate2acetate.formate
|
Haemophilus_parainfluenzae_HK262
|
gb.AJMW01000041.1.region001.GC_DNA..Entryname.Pyruvate2acetate.formate..OS.Haemophilus_parainfluenzae_HK262..SMASHregion.region001..NR.5
|
3.17
|
0.791
|
0.0137
|
5.08
|
1.01
|
0.000657
|
|
TPP_AA_metabolism
|
Clostridium_celatum_DSM_1785_genomic_scaffold
|
gb.KB291615.1.region002.GC_DNA..Entryname.TPP_AA_metabolism..OS.Clostridium_celatum_DSM_1785_genomic_scaffold..SMASHregion.region002..NR.1
|
3.24
|
0.813
|
0.0138
|
2.5
|
0.72
|
0.0303
|
|
Arginine2putrescine.Putrescine2spermidine
|
Romboutsia_sp._Frifi_strain_FRIFI_genome
|
gb.LN650648.1.region002.GC_DNA..Entryname.Arginine2putrescine.Putrescine2spermidine..OS.Romboutsia_sp._Frifi_strain_FRIFI_genome..SMASHregion.region002..NR.1
|
2.66
|
0.683
|
0.0177
|
2.57
|
0.523
|
0.000868
|
|
Pyruvate2acetate.formate
|
Haemophilus_haemolyticus_HK386
|
gb.AJSV01000030.1.region001.GC_DNA..Entryname.Pyruvate2acetate.formate..OS.Haemophilus_haemolyticus_HK386..SMASHregion.region001..NR.2
|
2.38
|
0.631
|
0.0234
|
5.02
|
0.724
|
3.95e-06
|
|
Pyruvate2acetate.formate
|
Haemophilus_aegyptius_ATCC_11116_genomic_scaffold
|
gb.GL878527.1.region001.GC_DNA..Entryname.Pyruvate2acetate.formate..OS.Haemophilus_aegyptius_ATCC_11116_genomic_scaffold..SMASHregion.region001..NR.2
|
2.16
|
0.574
|
0.0236
|
3.52
|
0.679
|
0.000428
|
|
acetate2butyrate
|
Clostridium_botulinum_B_str._Eklund
|
gb.CP001056.1.region002.GC_DNA..Entryname.acetate2butyrate..OS.Clostridium_botulinum_B_str._Eklund..SMASHregion.region002..NR.5..BG.2
|
2.42
|
0.646
|
0.0245
|
2.53
|
0.754
|
0.0396
|
|
OD_fatty_acids
|
Dialister_succinatiphilus_YIT_11850_genomic_scaffold
|
gb.JH591188.1.region001.GC_DNA..Entryname.OD_fatty_acids..OS.Dialister_succinatiphilus_YIT_11850_genomic_scaffold..SMASHregion.region001..NR.1
|
2.94
|
0.793
|
0.0265
|
2.54
|
0.579
|
0.00342
|
|
Pyruvate2acetate.formate
|
Clostridium_butyricum_strain_JKY6D1_chromosome
|
gb.CP013352.1.region004.GC_DNA..Entryname.Pyruvate2acetate.formate..OS.Clostridium_butyricum_strain_JKY6D1_chromosome..SMASHregion.region004..NR.6
|
3.46
|
0.938
|
0.0265
|
3.35
|
0.936
|
0.0241
|
|
TPP_AA_metabolism
|
Haemophilus_sputorum_HK_2154
|
gb.ALJP01000004.1.region001.GC_DNA..Entryname.TPP_AA_metabolism..OS.Haemophilus_sputorum_HK_2154..SMASHregion.region001..NR.1
|
2.96
|
0.812
|
0.0299
|
4.52
|
0.866
|
0.000392
|
|
Fumarate2succinate
|
Haemophilus_sp._HMSC61B11_genomic_scaffold
|
gb.KV838003.1.region001.GC_DNA..Entryname.Fumarate2succinate..OS.Haemophilus_sp._HMSC61B11_genomic_scaffold..SMASHregion.region001..NR.5
|
3.34
|
0.924
|
0.0312
|
4.62
|
1.01
|
0.0022
|
|
Respiratory_glycerol
|
Aggregatibacter_sp._oral_taxon_458_str._W10330_genomic_scaffold
|
gb.KE952617.1.region001.GC_DNA..Entryname.Respiratory_glycerol..OS.Aggregatibacter_sp._oral_taxon_458_str._W10330_genomic_scaffold..SMASHregion.region001..NR.1
|
2.91
|
0.808
|
0.0323
|
4.64
|
0.926
|
0.000678
|
|
TPP_AA_metabolism
|
Haemophilus_pittmaniae_HK_85
|
gb.AFUV01000010.1.region001.GC_DNA..Entryname.TPP_AA_metabolism..OS.Haemophilus_pittmaniae_HK_85..SMASHregion.region001..NR.1
|
2.43
|
0.686
|
0.0353
|
5.22
|
0.896
|
8.17e-05
|
|
PFOR_II_pathway
|
Clostridium_celatum_DSM_1785_genomic_scaffold
|
gb.KB291660.1.region001.GC_DNA..Entryname.PFOR_II_pathway..OS.Clostridium_celatum_DSM_1785_genomic_scaffold..SMASHregion.region001..NR.1
|
3.51
|
0.994
|
0.0353
|
3.77
|
0.982
|
0.0132
|
|
acetate2butyrate
|
Clostridium_celatum_DSM_1785_genomic_scaffold
|
gb.KB291615.1.region001.GC_DNA..Entryname.acetate2butyrate..OS.Clostridium_celatum_DSM_1785_genomic_scaffold..SMASHregion.region001..NR.1
|
3.41
|
0.981
|
0.0397
|
3.41
|
0.931
|
0.0197
|
|
Rnf_complex
|
Haemophilus_parainfluenzae_HK262
|
gb.AJMW01000064.1.region001.GC_DNA..Entryname.Rnf_complex..OS.Haemophilus_parainfluenzae_HK262..SMASHregion.region001..NR.6..BG.2
|
2.8
|
0.814
|
0.0418
|
5
|
0.992
|
0.000636
|
|
OD_fatty_acids.PFOR_II_pathway
|
Clostridium_botulinum_B_str._Eklund
|
gb.CP001056.1.region005.GC_DNA..Entryname.OD_fatty_acids.PFOR_II_pathway..OS.Clostridium_botulinum_B_str._Eklund..SMASHregion.region005..NR.5..BG.2
|
2.57
|
0.772
|
0.0497
|
3.16
|
0.835
|
0.0148
|
combined_results$negative_coef %>%
select(!c(metadata, value, N_bang, N.not.0_bang, pval_bang, N_mal, N.not.0_mal, pval_mal)) %>%
rename(`Coefficient Bangladesh` = coef_bang, `Standard Error Bangladesh` = stderr_bang, `Q-value Bangladesh` = qval_bang, `Coefficient Malawi` = coef_mal, `Standard Error Malawi` = stderr_mal, `Q-value Malawi` = qval_mal) %>%
dplyr::mutate_if(is.numeric, funs(as.character(signif(., 3)))) %>%
kbl() %>% kable_styling()
|
MGC_class
|
Species
|
feature
|
Coefficient Bangladesh
|
Standard Error Bangladesh
|
Q-value Bangladesh
|
Coefficient Malawi
|
Standard Error Malawi
|
Q-value Malawi
|
|
NADH_dehydrogenase_I
|
Actinomyces_viscosus_C505_genomic_scaffold
|
gb.KI391968.1.region002.GC_DNA..Entryname.NADH_dehydrogenase_I..OS.Actinomyces_viscosus_C505_genomic_scaffold..SMASHregion.region002..NR.5
|
-3.01
|
0.574
|
0.000568
|
-2.9
|
0.796
|
0.0205
|
|
Nitrate_reductase
|
Actinomyces_johnsonii_F0510_genomic_scaffold
|
gb.KE951633.1.region001.GC_DNA..Entryname.Nitrate_reductase..OS.Actinomyces_johnsonii_F0510_genomic_scaffold..SMASHregion.region001..NR.2
|
-2.76
|
0.544
|
0.000832
|
-2.26
|
0.657
|
0.0332
|
|
Rnf_complex.TPP_AA_metabolism
|
Eubacterium_siraeum_DSM_15702_Scfld_03_40_genomic
|
gb.DS499548.1.region001.GC_DNA..Entryname.Rnf_complex.TPP_AA_metabolism..OS.Eubacterium_siraeum_DSM_15702_Scfld_03_40_genomic..SMASHregion.region001..NR.2
|
-2.44
|
0.727
|
0.047
|
-4.29
|
1.23
|
0.0298
|
combined_results$positive_coef %>% mutate(split_species = str_split(Species, '_')) %>% mutate(genus = sapply(split_species, "[[", 1)) %>% mutate(specific = sapply(split_species, "[[", 2)) %>% mutate(clean_species = paste(genus, specific, sep = ' ')) %>% select(-split_species, -genus, -specific) %>% relocate(clean_species, .after = Species) %>% group_by(MGC_class) %>% arrange(Species) %>% summarise(n = n(), Species = paste(clean_species, collapse = ', ')) %>% arrange(desc(n), MGC_class) %>% kbl() %>% kable_styling()
|
MGC_class
|
n
|
Species
|
|
Pyruvate2acetate.formate
|
7
|
Clostridium butyricum, Clostridium carboxidivorans, Haemophilus
aegyptius, Haemophilus haemolyticus, Haemophilus parainfluenzae,
Prevotella copri, Prevotella sp.
|
|
TPP_AA_metabolism
|
4
|
Clostridium celatum, Haemophilus pittmaniae, Haemophilus sputorum,
Prevotella sp.
|
|
PFOR_II_pathway
|
3
|
Clostridium celatum, Romboutsia ilealis, Romboutsia ilealis
|
|
Rnf_complex
|
3
|
Haemophilus parainfluenzae, Prevotella copri, Romboutsia ilealis
|
|
Others_HGD_unassigned
|
2
|
Prevotella copri, Romboutsia ilealis
|
|
Respiratory_glycerol
|
2
|
Aggregatibacter sp., Haemophilus parainfluenzae
|
|
acetate2butyrate
|
2
|
Clostridium botulinum, Clostridium celatum
|
|
Arginine2_Hcarbonate
|
1
|
.Clostridium. sordellii
|
|
Arginine2putrescine.Putrescine2spermidine
|
1
|
Romboutsia sp.
|
|
Formate_dehydrogenase
|
1
|
Haemophilus sp.
|
|
Fumarate2succinate
|
1
|
Haemophilus sp.
|
|
OD_eut_pdu_related.PFOR_II_pathway
|
1
|
Paeniclostridium sordellii
|
|
OD_fatty_acids
|
1
|
Dialister succinatiphilus
|
|
OD_fatty_acids.PFOR_II_pathway
|
1
|
Clostridium botulinum
|
|
Pyruvate2acetate.formate.Acetyl.CoA_pathway
|
1
|
Romboutsia ilealis
|
|
TPP_AA_metabolism.Arginine2putrescine
|
1
|
Haemophilus sp.
|
|
TPP_fatty_acids
|
1
|
Prevotella copri
|
|
porA
|
1
|
Prevotella copri
|
|
succinate2propionate
|
1
|
Dialister invisus
|
combined_results$negative_coef %>% mutate(split_species = str_split(Species, '_')) %>% mutate(genus = sapply(split_species, "[[", 1)) %>% mutate(specific = sapply(split_species, "[[", 2)) %>% mutate(clean_species = paste(genus, specific, sep = ' ')) %>% select(-split_species, -genus, -specific) %>% relocate(clean_species, .after = Species) %>% group_by(MGC_class) %>% arrange(Species) %>% summarise(n = n(), Species = paste(clean_species, collapse = ', ')) %>% arrange(desc(n), MGC_class) %>% kbl() %>% kable_styling()
|
MGC_class
|
n
|
Species
|
|
NADH_dehydrogenase_I
|
1
|
Actinomyces viscosus
|
|
Nitrate_reductase
|
1
|
Actinomyces johnsonii
|
|
Rnf_complex.TPP_AA_metabolism
|
1
|
Eubacterium siraeum
|
healthy vs
carrier
phylum plots
order_of_groups <- c("Carrier", "Healthy")
# bangladesh_phyla_plot <- plot_per_country_abundance(phyla_clean_metadata = phyla_clean_metadata, country = "Bangladesh", group_order = order_of_groups)
# bangladesh_phyla_plot
malawi_phyla_plot <- plot_per_country_abundance(phyla_clean_metadata = phyla_clean_metadata, country = "Malawi", group_order = order_of_groups)
nepal_phyla_plot <- plot_per_country_abundance(phyla_clean_metadata = phyla_clean_metadata, country = "Nepal", group_order = order_of_groups)
# bangladesh_phyla_plot /
malawi_phyla_plot / nepal_phyla_plot

alpha
diversity
Alpha diversity - all countries, healthy and carrier
all_countries_healthy_carrier_alpha <- metaphlan_alpha(strataa_metaphlan_data, strataa_metaphlan_metadata, countries_of_interest = c('Malawi', 'Nepal'), groups_of_interest = c('Carrier', 'Control_HealthySerosurvey'), comparisons = list(c('Carrier', 'Control_HealthySerosurvey')))
all_countries_healthy_carrier_alpha$alpha_by_country

all_countries_healthy_carrier_alpha$alpha_anova_summary_with_var_names %>% dplyr::mutate_if(is.numeric, funs(as.character(signif(., 3)))) %>% kbl() %>% kable_styling()
|
rownames(alpha_anova_summary[[1]])
|
Df
|
Sum.Sq
|
Mean.Sq
|
F.value
|
Pr..F.
|
is_it_significant
|
|
Country:Group
|
1
|
4.82
|
4.82
|
40.9
|
3.97e-09
|
significant
|
|
Group
|
1
|
1.41
|
1.41
|
11.9
|
0.000786
|
significant
|
|
Group:Age
|
1
|
0.639
|
0.639
|
5.42
|
0.0217
|
not_significant
|
|
Country:Sex:Age
|
1
|
0.467
|
0.467
|
3.96
|
0.0491
|
not_significant
|
|
Country
|
1
|
0.438
|
0.438
|
3.72
|
0.0564
|
not_significant
|
|
Sex:Group
|
1
|
0.35
|
0.35
|
2.97
|
0.0875
|
not_significant
|
|
Age
|
1
|
0.0966
|
0.0966
|
0.819
|
0.367
|
not_significant
|
|
Country:Sex:Group
|
1
|
0.0738
|
0.0738
|
0.626
|
0.43
|
not_significant
|
|
Sex:Age
|
1
|
0.0673
|
0.0673
|
0.571
|
0.451
|
not_significant
|
|
Country:Group:Age
|
1
|
0.0151
|
0.0151
|
0.128
|
0.721
|
not_significant
|
|
Country:Sex
|
1
|
0.0139
|
0.0139
|
0.118
|
0.732
|
not_significant
|
|
Sex
|
1
|
0.00909
|
0.00909
|
0.0771
|
0.782
|
not_significant
|
|
Country:Age
|
1
|
0.00766
|
0.00766
|
0.065
|
0.799
|
not_significant
|
|
Country:Sex:Group:Age
|
1
|
0.00184
|
0.00184
|
0.0156
|
0.901
|
not_significant
|
|
Sex:Group:Age
|
1
|
6.12e-06
|
6.12e-06
|
5.19e-05
|
0.994
|
not_significant
|
|
Residuals
|
111
|
13.1
|
0.118
|
NA
|
NA
|
NA
|
all_countries_healthy_carrier_alpha$alpha_plot_group

beta diversity
Carrier vs healthy.
all_countries_beta_carrier_healthy <- metaphlan_beta(strataa_metaphlan_data, strataa_metaphlan_metadata, c('Malawi', 'Nepal'), c('Carrier', 'Control_HealthySerosurvey'))
all_countries_beta_carrier_healthy$pn_res %>% kbl %>% kable_styling()
|
|
R2
|
Pr(>F)
|
is_it_significant
|
|
Group
|
0.0653601
|
0.0009990
|
significant
|
|
Sex:Age
|
0.0141431
|
0.0299700
|
not_significant
|
|
Group:Age
|
0.0127135
|
0.0419580
|
not_significant
|
|
Age
|
0.0090662
|
0.2267732
|
not_significant
|
|
Sex:Group
|
0.0077753
|
0.3616384
|
not_significant
|
|
Sex
|
0.0072119
|
0.4595405
|
not_significant
|
|
Sex:Group:Age
|
0.0068119
|
0.5044955
|
not_significant
|
|
Total
|
1.0000000
|
NA
|
NA
|
|
Residual
|
0.8769180
|
NA
|
NA
|
# bgd_beta_carrier_healthy <- metaphlan_beta(strataa_metaphlan_data, strataa_metaphlan_metadata, c('Bangladesh'), c('Carrier', 'Control_HealthySerosurvey'))
# bgd_beta_carrier_healthy$pn_res %>% kbl %>% kable_styling()
mal_beta_carrier_healthy <- metaphlan_beta(strataa_metaphlan_data, strataa_metaphlan_metadata, c('Malawi'), c('Carrier', 'Control_HealthySerosurvey'))
mal_beta_carrier_healthy$pn_res %>% kbl %>% kable_styling()
|
|
R2
|
Pr(>F)
|
is_it_significant
|
|
Group
|
0.1999878
|
0.0009990
|
significant
|
|
Sex:Age
|
0.0304467
|
0.0049950
|
significant
|
|
Age
|
0.0187062
|
0.0569431
|
not_significant
|
|
Group:Age
|
0.0147510
|
0.1398601
|
not_significant
|
|
Sex:Group
|
0.0118411
|
0.2587413
|
not_significant
|
|
Sex
|
0.0099215
|
0.3936064
|
not_significant
|
|
Sex:Group:Age
|
0.0046358
|
0.9180819
|
not_significant
|
|
Total
|
1.0000000
|
NA
|
NA
|
|
Residual
|
0.7097099
|
NA
|
NA
|
nep_beta_carrier_healthy <- metaphlan_beta(strataa_metaphlan_data, strataa_metaphlan_metadata, c('Nepal'), c('Carrier', 'Control_HealthySerosurvey'))
nep_beta_carrier_healthy$pn_res %>% kbl %>% kable_styling()
|
|
R2
|
Pr(>F)
|
is_it_significant
|
|
Group
|
0.0836999
|
0.0009990
|
significant
|
|
Sex:Group
|
0.0241616
|
0.2667333
|
not_significant
|
|
Sex:Age
|
0.0240379
|
0.2747253
|
not_significant
|
|
Age
|
0.0195876
|
0.4765235
|
not_significant
|
|
Sex
|
0.0174225
|
0.5934066
|
not_significant
|
|
Sex:Group:Age
|
0.0149025
|
0.7772228
|
not_significant
|
|
Group:Age
|
0.0098712
|
0.9760240
|
not_significant
|
|
Total
|
1.0000000
|
NA
|
NA
|
|
Residual
|
0.8063168
|
NA
|
NA
|
all_countries_beta_carrier_healthy$pc12 + mal_beta_carrier_healthy$pc12 + nep_beta_carrier_healthy$pc12

maaslin taxonomic
groups
groups_to_analyse <- c('Carrier', 'Control_HealthySerosurvey')
# bang_variables_for_analysis <- c("Group", "Sex", "Age")
mwi_variables_for_analysis <- c("Group", "Sex", "Age", "sequencing_lane")
nep_variables_for_analysis <- c("Group", "Sex", "Age")
# bangladesh_taxonomic_maaslin <- read_in_maaslin('Bangladesh', groups_to_analyse, bang_variables_for_analysis, 'metaphlan')
malawi_taxonomic_maaslin <- read_in_maaslin('Malawi', groups_to_analyse, mwi_variables_for_analysis, 'metaphlan')
nepal_taxonomic_maaslin <- read_in_maaslin('Nepal', groups_to_analyse, nep_variables_for_analysis, 'metaphlan')
# bangladesh_taxonomic_maaslin_filtered <- filter_taxonomic_maaslin(bangladesh_taxonomic_maaslin)
malawi_taxonomic_maaslin_filtered <- filter_taxonomic_maaslin(malawi_taxonomic_maaslin)
nepal_taxonomic_maaslin_filtered <- filter_taxonomic_maaslin(nepal_taxonomic_maaslin)
# bangladesh_maaslin_stats <- basic_maaslin_stats(bangladesh_taxonomic_maaslin_filtered, 'Bangladesh', bang_variables_for_analysis, groups_to_analyse)
malawi_maaslin_stats <- basic_maaslin_stats(malawi_taxonomic_maaslin_filtered, 'Malawi', mwi_variables_for_analysis, groups_to_analyse)
nepal_maaslin_stats <- basic_maaslin_stats(nepal_taxonomic_maaslin_filtered, 'Nepal', nep_variables_for_analysis, groups_to_analyse)
# / bangladesh_maaslin_stats$volcano_plot
malawi_maaslin_stats$volcano_plot / nepal_maaslin_stats$volcano_plot

nrow(malawi_maaslin_stats$maaslin_results_sig)
## [1] 117
# nrow(bangladesh_maaslin_stats$maaslin_results_sig)
nrow(nepal_maaslin_stats$maaslin_results_sig)
## [1] 1
We’re not including the bangladesh samples in this analysis, because
the bangladesh carriers were processed differently (extracted without
being frozen).
There are no species significantly associated with carrier status in
all Mal and Nep, so not including the combine.
# combined_results <- run_combine_maaslins(bangladesh_taxonomic_maaslin_filtered, malawi_taxonomic_maaslin_filtered, bang_variables_for_analysis, mwi_variables_for_analysis, groups_to_analyse, 'metaphlan', maaslin_taxonomic_output_root_folder)
# only one species significantly associated in Nepal.
# there are no species consistently associated across all three sites
# bang_mal_nep <- list(bangladesh_taxonomic_maaslin_filtered, malawi_taxonomic_maaslin_filtered, nepal_taxonomic_maaslin_filtered)
# combined_results <- run_combine_maaslins(bang_mal_nep, c('_bang', '_mwi', '_nep'), mwi_variables_for_analysis, groups_to_analyse, 'metaphlan', maaslin_taxonomic_output_root_folder)
# View(combined_results$positive_coef)
# View(combined_results$negative_coef)
# there are no species associated in both bang and nep
# bang_nep <- list(bangladesh_taxonomic_maaslin_filtered, nepal_taxonomic_maaslin_filtered)
# combined_results <- run_combine_maaslins(bang_nep, c('_bang', '_nep'), bang_variables_for_analysis, groups_to_analyse, 'metaphlan', maaslin_taxonomic_output_root_folder)
# View(combined_results$positive_coef)
# View(combined_results$negative_coef)
# there are no species associated in both mal and nep
# mal_nep <- list(malawi_taxonomic_maaslin_filtered, nepal_taxonomic_maaslin_filtered)
# combined_results <- run_combine_maaslins(bang_nep, c('_mal', '_nep'), bang_variables_for_analysis, groups_to_analyse, 'metaphlan', maaslin_taxonomic_output_root_folder)
# View(combined_results$positive_coef)
# View(combined_results$negative_coef)
# bang_mal <- list(bangladesh_taxonomic_maaslin_filtered, malawi_taxonomic_maaslin_filtered)
# combined_results <- run_combine_maaslins(bang_mal, c('_bang', '_mal'), mwi_variables_for_analysis, groups_to_analyse, 'metaphlan', maaslin_taxonomic_output_root_folder)
# # positive is associated with health
# combined_results$positive_coef %>% filter(grepl('^s', lowest_taxonomic_level)) %>%
# select(!c(metadata, value, N_bang, N.not.0_bang, pval_bang, N_mal, N.not.0_mal, pval_mal)) %>%
# rename(Species = lowest_taxonomic_level, `Coefficient Bangladesh` = coef_bang, `Standard Error Bangladesh` = stderr_bang, `Q-value Bangladesh` = qval_bang, `Coefficient Malawi` = coef_mal, `Standard Error Malawi` = stderr_mal, `Q-value Malawi` = qval_mal) %>%
# dplyr::mutate_if(is.numeric, funs(as.character(signif(., 3)))) %>%
# kbl() %>% kable_styling()
# # negative is associated with carrier
# combined_results$negative_coef %>% filter(grepl('^s', lowest_taxonomic_level)) %>%
# select(!c(metadata, value, N_bang, N.not.0_bang, pval_bang, N_mal, N.not.0_mal, pval_mal)) %>%
# rename(Species = lowest_taxonomic_level, `Coefficient Bangladesh` = coef_bang, `Standard Error Bangladesh` = stderr_bang, `Q-value Bangladesh` = qval_bang, `Coefficient Malawi` = coef_mal, `Standard Error Malawi` = stderr_mal, `Q-value Malawi` = qval_mal) %>%
# dplyr::mutate_if(is.numeric, funs(as.character(signif(., 3)))) %>%
# kbl() %>% kable_styling()
# nrow(combined_results$mwi_maaslin_only)
# nrow(combined_results$bang_maaslin_only)
functional
groups
# bang_variables_for_analysis <- c("Group", "Sex", "Age")
mwi_variables_for_analysis <- c("Group", "Sex", "Age", "sequencing_lane")
# nep_variables_for_analysis <- c("Group", "Sex", "Age", "Antibiotics_taken_before_sampling_yes_no_assumptions")
groups_to_analyse <- c('Carrier', 'Control_HealthySerosurvey')
# bang_func_carr_health_maaslin <- read_in_maaslin('Bangladesh', groups_to_analyse, bang_variables_for_analysis, 'bigmap')
mal_func_carr_health_maaslin <- read_in_maaslin('Malawi', groups_to_analyse, mwi_variables_for_analysis, 'bigmap')
# combined_results <- run_combine_maaslins(bang_func_carr_health_maaslin, mal_func_carr_health_maaslin, bang_variables_for_analysis, mwi_variables_for_analysis, groups_to_analyse, 'bigmap', maaslin_functional_output_root_folder)
# bang_mal <- list(bang_func_carr_health_maaslin, mal_func_carr_health_maaslin)
# combined_results <- run_combine_maaslins(bang_mal, c('_bang', '_mal'), mwi_variables_for_analysis, groups_to_analyse, 'bigmap', maaslin_functional_output_root_folder)
# # positive is associated with health
# # combined_results$positive_coef %>% kbl() %>% kable_styling()
# combined_results$positive_coef %>%
# select(!c(metadata, value, N_bang, N.not.0_bang, pval_bang, N_mal, N.not.0_mal, pval_mal)) %>%
# rename(`Coefficient Bangladesh` = coef_bang, `Standard Error Bangladesh` = stderr_bang, `Q-value Bangladesh` = qval_bang, `Coefficient Malawi` = coef_mal, `Standard Error Malawi` = stderr_mal, `Q-value Malawi` = qval_mal) %>%
# dplyr::mutate_if(is.numeric, funs(as.character(signif(., 3)))) %>%
# kbl() %>% kable_styling()
# # negative is associated with carrier
# # combined_results$negative_coef %>% kbl() %>% kable_styling()
# combined_results$negative_coef %>%
# select(!c(metadata, value, N_bang, N.not.0_bang, pval_bang, N_mal, N.not.0_mal, pval_mal)) %>%
# rename(`Coefficient Bangladesh` = coef_bang, `Standard Error Bangladesh` = stderr_bang, `Q-value Bangladesh` = qval_bang, `Coefficient Malawi` = coef_mal, `Standard Error Malawi` = stderr_mal, `Q-value Malawi` = qval_mal) %>%
# dplyr::mutate_if(is.numeric, funs(as.character(signif(., 3)))) %>%
# kbl() %>% kable_styling()
# # print out a nicely formatted table of the count of the MGCs
# combined_results$positive_coef %>% mutate(split_species = str_split(Species, '_')) %>% mutate(genus = sapply(split_species, "[[", 1)) %>% mutate(specific = sapply(split_species, "[[", 2)) %>% mutate(clean_species = paste(genus, specific, sep = ' ')) %>% select(-split_species, -genus, -specific) %>% relocate(clean_species, .after = Species) %>% group_by(MGC_class) %>% arrange(Species) %>% summarise(n = n(), Species = paste(clean_species, collapse = ', ')) %>% arrange(desc(n), MGC_class) %>% kbl() %>% kable_styling()
# combined_results$negative_coef %>% mutate(split_species = str_split(Species, '_')) %>% mutate(genus = sapply(split_species, "[[", 1)) %>% mutate(specific = sapply(split_species, "[[", 2)) %>% mutate(clean_species = paste(genus, specific, sep = ' ')) %>% select(-split_species, -genus, -specific) %>% relocate(clean_species, .after = Species) %>% group_by(MGC_class) %>% arrange(Species) %>% summarise(n = n(), Species = paste(clean_species, collapse = ', ')) %>% arrange(desc(n), MGC_class) %>% kbl() %>% kable_styling()